import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
# Read the dataset
df = pd.read_csv("Life_expectancy_merged.csv")
df
| Country | Region | Year | Infant_deaths | Under_five_deaths | Adult_mortality | Alcohol_consumption | Hepatitis_B | Measles | BMI | ... | Population_mln | Thinness_ten_nineteen_years | Thinness_five_nine_years | Schooling | Economy_status_Developed | Economy_status_Developing | Life_expectancy | Climate_Zone | AQI | Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Turkiye | Middle East | 2015 | 11.1 | 13.0 | 105.8240 | 1.320 | 97 | 65 | 27.8 | ... | 78.53 | 4.9 | 4.8 | 7.8 | 0 | 1 | 76.5 | Temperate | 124.603599 | Unhealthy |
| 1 | Spain | European Union | 2015 | 2.7 | 3.3 | 57.9025 | 10.350 | 97 | 94 | 26.0 | ... | 46.44 | 0.6 | 0.5 | 9.7 | 1 | 0 | 82.8 | Mediterranean | 77.987395 | Moderate |
| 2 | India | Asia | 2007 | 51.5 | 67.9 | 201.0765 | 1.570 | 60 | 35 | 21.2 | ... | 1183.21 | 27.1 | 28.0 | 5.0 | 0 | 1 | 65.4 | Diverse | 66.966041 | Moderate |
| 3 | Guyana | South America | 2006 | 32.8 | 40.5 | 222.1965 | 5.680 | 93 | 74 | 25.3 | ... | 0.75 | 5.7 | 5.5 | 7.9 | 0 | 1 | 67.0 | Tropical | 87.566295 | Moderate |
| 4 | Israel | Middle East | 2012 | 3.4 | 4.3 | 57.9510 | 2.890 | 97 | 89 | 27.0 | ... | 7.91 | 1.2 | 1.1 | 12.8 | 1 | 0 | 81.7 | Dry | 100.953959 | Hazardous |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2859 | Niger | Africa | 2000 | 97.0 | 224.9 | 291.8240 | 0.092 | 72 | 64 | 20.8 | ... | 11.33 | 12.8 | 12.9 | 1.1 | 0 | 1 | 49.9 | Tropical | 104.715199 | Unhealthy |
| 2860 | Mongolia | Asia | 2009 | 23.9 | 28.6 | 235.2330 | 6.560 | 97 | 97 | 25.3 | ... | 2.67 | 2.2 | 2.3 | 9.1 | 0 | 1 | 66.9 | Dry | 34.276856 | Good |
| 2861 | Sri Lanka | Asia | 2004 | 17.7 | 28.9 | 134.8950 | 1.560 | 62 | 95 | 21.9 | ... | 19.39 | 15.4 | 15.5 | 10.3 | 0 | 1 | 74.3 | Tropical | 90.565837 | Moderate |
| 2862 | Lithuania | European Union | 2002 | 7.9 | 9.9 | 204.0120 | 11.000 | 94 | 95 | 26.1 | ... | 3.44 | 3.3 | 3.3 | 11.1 | 1 | 0 | 71.8 | Temperate | 102.015136 | Unhealthy |
| 2863 | Iceland | Rest of Europe | 2011 | 2.1 | 2.6 | 50.5745 | 6.840 | 88 | 90 | 26.1 | ... | 0.32 | 0.9 | 0.9 | 11.0 | 1 | 0 | 82.4 | Temperate | 82.321342 | Moderate |
2864 rows × 24 columns
data = df[df['Year'] >= 2013]
# Selecting relevant columns
columns = ['Country', 'Year', 'Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B',
'Polio', 'Diphtheria', 'Climate_Zone', 'Economy_status_Developing']
data = data[columns]
# Remove NaN values
data.dropna(inplace=True)
data.head()
| Country | Year | Under_five_deaths | GDP_per_capita | Hepatitis_B | Polio | Diphtheria | Climate_Zone | Economy_status_Developing | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Turkiye | 2015 | 13.0 | 11006 | 97 | 97 | 97 | Temperate | 1 |
| 1 | Spain | 2015 | 3.3 | 25742 | 97 | 97 | 97 | Mediterranean | 0 |
| 6 | Russian Federation | 2015 | 8.2 | 9313 | 97 | 97 | 97 | Diverse | 1 |
| 16 | Finland | 2013 | 2.7 | 43045 | 88 | 98 | 98 | Temperate | 0 |
| 25 | Belize | 2013 | 16.9 | 4667 | 95 | 95 | 95 | Tropical | 1 |
def remove_outliers(df, column_list):
for column in column_list:
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df
# Removing outliers from the relevant columns
outlier_columns = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']
data = remove_outliers(data, outlier_columns)
data
| Country | Year | Under_five_deaths | GDP_per_capita | Hepatitis_B | Polio | Diphtheria | Climate_Zone | Economy_status_Developing | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Turkiye | 2015 | 13.0 | 11006 | 97 | 97 | 97 | Temperate | 1 |
| 1 | Spain | 2015 | 3.3 | 25742 | 97 | 97 | 97 | Mediterranean | 0 |
| 6 | Russian Federation | 2015 | 8.2 | 9313 | 97 | 97 | 97 | Diverse | 1 |
| 25 | Belize | 2013 | 16.9 | 4667 | 95 | 95 | 95 | Tropical | 1 |
| 27 | Cameroon | 2015 | 88.0 | 1383 | 84 | 77 | 84 | Tropical | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2840 | Djibouti | 2015 | 65.8 | 2653 | 84 | 84 | 84 | Desert | 1 |
| 2843 | Kiribati | 2014 | 59.2 | 1417 | 75 | 79 | 75 | Tropical | 1 |
| 2846 | Nicaragua | 2015 | 19.4 | 2050 | 98 | 99 | 98 | Tropical | 1 |
| 2848 | Bahrain | 2015 | 7.6 | 22634 | 98 | 98 | 98 | Desert | 1 |
| 2854 | Fiji | 2013 | 23.7 | 4902 | 99 | 99 | 99 | Tropical | 1 |
404 rows × 9 columns
data.describe()
| Year | Under_five_deaths | GDP_per_capita | Hepatitis_B | Polio | Diphtheria | Economy_status_Developing | |
|---|---|---|---|---|---|---|---|
| count | 404.000000 | 404.000000 | 404.000000 | 404.000000 | 404.000000 | 404.000000 | 404.000000 |
| mean | 2013.995050 | 30.437871 | 7229.750000 | 91.564356 | 91.349010 | 91.732673 | 0.873762 |
| std | 0.818505 | 26.519279 | 7138.525863 | 7.273346 | 8.013294 | 7.352373 | 0.332529 |
| min | 2013.000000 | 2.600000 | 306.000000 | 70.000000 | 65.000000 | 72.000000 | 0.000000 |
| 25% | 2013.000000 | 10.350000 | 1727.500000 | 88.000000 | 87.000000 | 88.000000 | 1.000000 |
| 50% | 2014.000000 | 19.300000 | 4833.500000 | 94.000000 | 94.000000 | 94.000000 | 1.000000 |
| 75% | 2015.000000 | 49.425000 | 10095.750000 | 97.000000 | 98.000000 | 98.000000 | 1.000000 |
| max | 2015.000000 | 108.300000 | 32136.000000 | 99.000000 | 99.000000 | 99.000000 | 1.000000 |
# Normalizing the data
normalization_columns = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']
scaler = MinMaxScaler()
data[normalization_columns] = scaler.fit_transform(data[normalization_columns])
# Display the normalized data
print("Normalized Data:")
print(data[normalization_columns].head())
Normalized Data:
Under_five_deaths GDP_per_capita Hepatitis_B Polio Diphtheria
0 0.098392 0.336161 0.931034 0.941176 0.925926
1 0.006623 0.799120 0.931034 0.941176 0.925926
6 0.052980 0.282972 0.931034 0.941176 0.925926
25 0.135289 0.137009 0.862069 0.882353 0.851852
27 0.807947 0.033836 0.482759 0.352941 0.444444
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming 'data' is your DataFrame and 'normalization_columns' contains the columns you normalized
# Creating histograms for each normalized column
for column in normalization_columns:
plt.figure(figsize=(8, 4))
sns.histplot(data[column], bins=20, kde=True)
plt.title(f'Histogram of Normalized {column}')
plt.xlabel(column)
plt.ylabel('Frequency')
plt.show()
import scipy.stats as stats
# Creating Q-Q plots for each normalized column
for column in normalization_columns:
plt.figure(figsize=(6, 6))
stats.probplot(data[column], dist="norm", plot=plt)
plt.title(f'Q-Q Plot of Normalized {column}')
plt.show()
the null hypothesis = that the data is normally distributed Alternate hypothesis= the data is not normally distributed
import pandas as pd
from scipy.stats import shapiro
# Assuming 'data' is your DataFrame with normalized data
# List of columns to test for normality
columns_to_test = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']
# Performing Shapiro-Wilk test on each column
for column in columns_to_test:
stat, p = shapiro(data[column])
print(f'Normality test for {column}: Statistics={stat:.3f}, p={p:.3f}')
# Interpretation
alpha = 0.05
if p > alpha:
print(f' {column} looks Gaussian (fail to reject H0)')
else:
print(f' {column} does not look Gaussian (reject H0)')
Normality test for Under_five_deaths: Statistics=0.851, p=0.000 Under_five_deaths does not look Gaussian (reject H0) Normality test for GDP_per_capita: Statistics=0.828, p=0.000 GDP_per_capita does not look Gaussian (reject H0) Normality test for Hepatitis_B: Statistics=0.866, p=0.000 Hepatitis_B does not look Gaussian (reject H0) Normality test for Polio: Statistics=0.851, p=0.000 Polio does not look Gaussian (reject H0) Normality test for Diphtheria: Statistics=0.856, p=0.000 Diphtheria does not look Gaussian (reject H0)
The results from your Shapiro-Wilk normality tests indicate that the data in all the tested columns ('Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', and 'Diphtheria') do not follow a normal distribution (Gaussian distribution). This conclusion is drawn from the fact that the p-values for all these columns are very small (0.000), leading to the rejection of the null hypothesis that the data is normally distributed.
Considering our nature of data- combination of non-parametric statistical tests and possibly some multivariate analysis would be appropriate.
import pandas as pd
from scipy.stats import spearmanr
# Assuming 'data' is your DataFrame
columns_to_correlate = ['GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria', 'Under_five_deaths']
# Calculating Spearman's Rank Correlation
for col1 in columns_to_correlate:
for col2 in columns_to_correlate:
if col1 != col2:
coef, p = spearmanr(data[col1], data[col2])
print(f"Spearman correlation between {col1} and {col2}: Coefficient={coef:.3f}, P-value={p:.3f}")
Spearman correlation between GDP_per_capita and Hepatitis_B: Coefficient=0.340, P-value=0.000 Spearman correlation between GDP_per_capita and Polio: Coefficient=0.391, P-value=0.000 Spearman correlation between GDP_per_capita and Diphtheria: Coefficient=0.410, P-value=0.000 Spearman correlation between GDP_per_capita and Under_five_deaths: Coefficient=-0.823, P-value=0.000 Spearman correlation between Hepatitis_B and GDP_per_capita: Coefficient=0.340, P-value=0.000 Spearman correlation between Hepatitis_B and Polio: Coefficient=0.884, P-value=0.000 Spearman correlation between Hepatitis_B and Diphtheria: Coefficient=0.936, P-value=0.000 Spearman correlation between Hepatitis_B and Under_five_deaths: Coefficient=-0.427, P-value=0.000 Spearman correlation between Polio and GDP_per_capita: Coefficient=0.391, P-value=0.000 Spearman correlation between Polio and Hepatitis_B: Coefficient=0.884, P-value=0.000 Spearman correlation between Polio and Diphtheria: Coefficient=0.919, P-value=0.000 Spearman correlation between Polio and Under_five_deaths: Coefficient=-0.493, P-value=0.000 Spearman correlation between Diphtheria and GDP_per_capita: Coefficient=0.410, P-value=0.000 Spearman correlation between Diphtheria and Hepatitis_B: Coefficient=0.936, P-value=0.000 Spearman correlation between Diphtheria and Polio: Coefficient=0.919, P-value=0.000 Spearman correlation between Diphtheria and Under_five_deaths: Coefficient=-0.495, P-value=0.000 Spearman correlation between Under_five_deaths and GDP_per_capita: Coefficient=-0.823, P-value=0.000 Spearman correlation between Under_five_deaths and Hepatitis_B: Coefficient=-0.427, P-value=0.000 Spearman correlation between Under_five_deaths and Polio: Coefficient=-0.493, P-value=0.000 Spearman correlation between Under_five_deaths and Diphtheria: Coefficient=-0.495, P-value=0.000
Interpretation:
Positive Correlations with GDP Per Capita: There are positive correlations between GDP per capita and immunization rates (Hepatitis B, Polio, Diphtheria), suggesting that higher economic status is generally associated with better immunization coverage.
Strong Positive Correlations Among Immunization Rates: The immunization rates for Hepatitis B, Polio, and Diphtheria are highly correlated with each other, indicating that improvements in healthcare access in one area often accompany improvements in others.
Negative Correlations with Under-Five Deaths: There are strong negative correlations between under-five mortality rates and both GDP per capita and immunization rates. This indicates that higher economic status and better immunization coverage are associated with lower under-five mortality rates.
In summary, these correlations suggest a significant association between economic status, healthcare access (as measured by immunization rates), and child mortality, where better economic conditions and improved healthcare access are linked to lower under-five mortality rates.
from scipy.stats import kruskal
# Assuming 'data' has a column 'Climate_Zone'
# Comparing 'Under_five_deaths' across different 'Climate_Zone'
climate_zones = data['Climate_Zone'].unique()
grouped_data = [data['Under_five_deaths'][data['Climate_Zone'] == zone] for zone in climate_zones]
stat, p = kruskal(*grouped_data)
print(f"Kruskal-Wallis Test: Statistics={stat:.3f}, p={p:.3f}")
Kruskal-Wallis Test: Statistics=152.658, p=0.000
Statistical Significance: The very low p-value (0.000) suggests that there are statistically significant differences in under-five mortality rates among the various climate zones in your dataset.
Climate Zone Impact: This finding implies that the climate zone is a factor that differentiates under-five mortality rates. It supports the idea that the mortality rate for children under five is not uniform across different climate zones.
Post-Hoc Analysis: Since the Kruskal-Wallis test indicates that there are differences but does not specify between which climate zones these differences occur, you might consider conducting post-hoc tests. Methods like the Dunn's test can be used to compare specific pairs of climate zones to identify where the significant differences lie.
pip install statsmodels
Requirement already satisfied: statsmodels in c:\users\palla\anaconda3\lib\site-packages (0.13.2) Requirement already satisfied: numpy>=1.17 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.21.5) Requirement already satisfied: scipy>=1.3 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.9.1) Requirement already satisfied: pandas>=0.25 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.4.4) Requirement already satisfied: patsy>=0.5.2 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (0.5.2) Requirement already satisfied: packaging>=21.3 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (21.3) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\palla\anaconda3\lib\site-packages (from packaging>=21.3->statsmodels) (3.0.9) Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=0.25->statsmodels) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=0.25->statsmodels) (2022.1) Requirement already satisfied: six in c:\users\palla\anaconda3\lib\site-packages (from patsy>=0.5.2->statsmodels) (1.16.0) Note: you may need to restart the kernel to use updated packages.
import pandas as pd
from scipy.stats import kruskal
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison
# Assuming 'data' is your DataFrame and has columns 'Climate_Zone' and 'Under_five_deaths'
# Conducting Kruskal-Wallis Test
climate_zones = data['Climate_Zone'].unique()
grouped_data = [data['Under_five_deaths'][data['Climate_Zone'] == zone] for zone in climate_zones]
stat, p = kruskal(*grouped_data)
print(f"Kruskal-Wallis Test: Statistics={stat:.3f}, p={p:.3f}")
# Conducting Dunn's Post-Hoc Test
mc = MultiComparison(data['Under_five_deaths'], data['Climate_Zone'])
result = mc.tukeyhsd()
print(result)
print(mc.groupsunique)
Kruskal-Wallis Test: Statistics=152.658, p=0.000
Multiple Comparison of Means - Tukey HSD, FWER=0.05
==================================================================
group1 group2 meandiff p-adj lower upper reject
------------------------------------------------------------------
Desert Diverse -0.0171 0.9999 -0.2331 0.1989 False
Desert Dry 0.0833 0.5763 -0.0627 0.2294 False
Desert Mediterranean -0.2193 0.0059 -0.3967 -0.042 True
Desert Temperate -0.1319 0.0397 -0.2602 -0.0037 True
Desert Tropical 0.1032 0.1073 -0.0118 0.2182 False
Diverse Dry 0.1004 0.7584 -0.1131 0.3139 False
Diverse Mediterranean -0.2022 0.1408 -0.4383 0.0338 False
Diverse Temperate -0.1148 0.5795 -0.3166 0.087 False
Diverse Tropical 0.1203 0.4805 -0.0733 0.3139 False
Dry Mediterranean -0.3027 0.0 -0.477 -0.1283 True
Dry Temperate -0.2152 0.0 -0.3393 -0.0912 True
Dry Tropical 0.0198 0.9956 -0.0904 0.1301 False
Mediterranean Temperate 0.0874 0.6205 -0.0723 0.2472 False
Mediterranean Tropical 0.3225 0.0 0.1732 0.4718 True
Temperate Tropical 0.2351 0.0 0.1498 0.3204 True
------------------------------------------------------------------
['Desert' 'Diverse' 'Dry' 'Mediterranean' 'Temperate' 'Tropical']
The Tukey HSD post-hoc test results following the Kruskal-Wallis test show significant differences in under-five mortality rates among various climate zones. Key findings include:
Desert vs. Mediterranean and Temperate: Mortality rates are significantly lower in Mediterranean and Temperate zones compared to Desert zones.
Dry vs. Mediterranean and Temperate: Similarly, Mediterranean and Temperate zones have significantly lower mortality rates than Dry zones.
Mediterranean vs. Tropical: Tropical zones have significantly higher mortality rates compared to Mediterranean zones.
Temperate vs. Tropical: Temperate zones have significantly lower mortality rates than Tropical zones.
Tropical Zone > Desert Zone > Dry Zone > Temperate Zone > Mediterranean Zone
Final Inference:
GDP per Capita and Healthcare Access: Countries with higher GDP per capita usually have better healthcare access, including higher immunization rates for Hepatitis B, Polio, and Diphtheria.
Vaccinations and Under-Five Deaths: When more children get vaccinated, the number of under-five deaths decreases. This shows that better healthcare access helps in reducing child mortality.
Different Climates, Different Results: The impact of GDP per capita and healthcare access on under-five deaths varies depending on the climate zone. For instance, in Tropical climates, under-five deaths are higher compared to Mediterranean or Temperate climates.
Climate's Influence: The study found that the climate zone plays a role in the number of under-five deaths in those areas.
Overall Finding: In developing countries, higher GDP per capita and better healthcare access, especially in terms of immunization, are linked to lower under-five deaths. However, the climate zone also influences these outcomes, with some climates having higher rates of under-five deaths than others.
###Visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.scatterplot(x='GDP_per_capita', y='Hepatitis_B', data=data, label='Hepatitis B')
sns.scatterplot(x='GDP_per_capita', y='Polio', data=data, label='Polio')
sns.scatterplot(x='GDP_per_capita', y='Diphtheria', data=data, label='Diphtheria')
plt.xlabel('GDP per Capita')
plt.ylabel('Immunization Rate')
plt.title('GDP per Capita vs. Immunization Rates')
plt.legend()
plt.show()
sns.scatterplot(x='GDP_per_capita', y='Under_five_deaths', data=data)
plt.xlabel('GDP per Capita')
plt.ylabel('Under-Five Deaths')
plt.title('GDP per Capita vs. Under-Five Deaths')
plt.show()
sns.boxplot(x='Climate_Zone', y='Under_five_deaths', data=data)
plt.xlabel('Climate Zone')
plt.ylabel('Under-Five Deaths')
plt.title('Under-Five Deaths in Different Climate Zones')
plt.xticks(rotation=45)
plt.show()
import numpy as np
correlation_matrix = data[['GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria', 'Under_five_deaths']].corr(method='spearman')
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Spearman Correlation Matrix')
plt.show()
pip install geopandas matplotlib
Collecting geopandas
Downloading geopandas-0.14.1-py3-none-any.whl (1.1 MB)
---------------------------------------- 1.1/1.1 MB 7.0 MB/s eta 0:00:00
Requirement already satisfied: matplotlib in c:\users\palla\anaconda3\lib\site-packages (3.5.2)
Collecting fiona>=1.8.21
Downloading fiona-1.9.5-cp39-cp39-win_amd64.whl (22.9 MB)
---------------------------------------- 22.9/22.9 MB 9.3 MB/s eta 0:00:00
Requirement already satisfied: pandas>=1.4.0 in c:\users\palla\anaconda3\lib\site-packages (from geopandas) (1.4.4)
Collecting shapely>=1.8.0
Downloading shapely-2.0.2-cp39-cp39-win_amd64.whl (1.4 MB)
---------------------------------------- 1.4/1.4 MB 11.4 MB/s eta 0:00:00
Requirement already satisfied: packaging in c:\users\palla\anaconda3\lib\site-packages (from geopandas) (21.3)
Collecting pyproj>=3.3.0
Downloading pyproj-3.6.1-cp39-cp39-win_amd64.whl (6.1 MB)
---------------------------------------- 6.1/6.1 MB 13.9 MB/s eta 0:00:00
Requirement already satisfied: fonttools>=4.22.0 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (4.25.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (2.8.2)
Requirement already satisfied: numpy>=1.17 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (1.21.5)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: pillow>=6.2.0 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (9.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (1.4.2)
Requirement already satisfied: attrs>=19.2.0 in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (21.4.0)
Requirement already satisfied: importlib-metadata in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (4.11.3)
Requirement already satisfied: six in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.16.0)
Requirement already satisfied: click~=8.0 in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (8.0.4)
Collecting click-plugins>=1.0
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Requirement already satisfied: certifi in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (2022.9.14)
Collecting cligj>=0.5
Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Requirement already satisfied: setuptools in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (63.4.1)
Requirement already satisfied: pytz>=2020.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2022.1)
Requirement already satisfied: colorama in c:\users\palla\anaconda3\lib\site-packages (from click~=8.0->fiona>=1.8.21->geopandas) (0.4.5)
Requirement already satisfied: zipp>=0.5 in c:\users\palla\anaconda3\lib\site-packages (from importlib-metadata->fiona>=1.8.21->geopandas) (3.8.0)
Installing collected packages: shapely, pyproj, cligj, click-plugins, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.9.5 geopandas-0.14.1 pyproj-3.6.1 shapely-2.0.2
Note: you may need to restart the kernel to use updated packages.
pip install plotly
Requirement already satisfied: plotly in c:\users\palla\anaconda3\lib\site-packages (5.9.0) Requirement already satisfied: tenacity>=6.2.0 in c:\users\palla\anaconda3\lib\site-packages (from plotly) (8.0.1) Note: you may need to restart the kernel to use updated packages.
pip install pycountry
Collecting pycountry
Downloading pycountry-22.3.5.tar.gz (10.1 MB)
--------------------------------------- 10.1/10.1 MB 11.4 MB/s eta 0:00:00
Installing build dependencies: started
Installing build dependencies: finished with status 'done'
Getting requirements to build wheel: started
Getting requirements to build wheel: finished with status 'done'
Preparing metadata (pyproject.toml): started
Preparing metadata (pyproject.toml): finished with status 'done'
Requirement already satisfied: setuptools in c:\users\palla\anaconda3\lib\site-packages (from pycountry) (63.4.1)
Building wheels for collected packages: pycountry
Building wheel for pycountry (pyproject.toml): started
Building wheel for pycountry (pyproject.toml): finished with status 'done'
Created wheel for pycountry: filename=pycountry-22.3.5-py2.py3-none-any.whl size=10681895 sha256=598b233dac8865bbcdfdebe84efae2d341d7a26cfe3e1554c68560856b284774
Stored in directory: c:\users\palla\appdata\local\pip\cache\wheels\47\15\92\e6dc85fcb0686c82e1edbcfdf80cfe4808c058813fed0baa8f
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-22.3.5
Note: you may need to restart the kernel to use updated packages.
import pycountry
import pandas as pd
# Assuming 'data' is your DataFrame
# Function to get ISO code for a country
def get_iso_code(country):
try:
return pycountry.countries.get(name=country).alpha_3
except:
return None
# Apply the function to your country column
data['iso_alpha'] = data['Country'].apply(get_iso_code)
import plotly.express as px
# Now that 'iso_alpha' is added to 'data'
fig = px.choropleth(data,
locations="iso_alpha",
color="Climate_Zone",
hover_name="Country",
color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(title_text='Climate Zones by Country')
fig.show()
# Calculate the average immunization rate
data['Avg_Immunization'] = data[['Hepatitis_B', 'Polio', 'Diphtheria']].mean(axis=1)
import plotly.express as px
# Assuming you have now added 'Avg_Immunization' to 'data'
fig_immunization = px.choropleth(data,
locations="iso_alpha",
color="Avg_Immunization",
hover_name="Country",
color_continuous_scale=px.colors.sequential.Viridis)
fig_immunization.update_layout(title_text='Average Immunization Rates by Country')
fig_immunization.show()
# Map for Under-Five Deaths
fig_deaths = px.choropleth(data,
locations="iso_alpha",
color="Under_five_deaths",
hover_name="Country",
color_continuous_scale=px.colors.sequential.OrRd)
fig_deaths.update_layout(title_text='Under-Five Deaths by Country')
fig_deaths.show()